#!/usr/bin/env bash
PWD=`pwd`
HDFS_PATH="/user/graph_builder/data/lz-whole"
CHANGSHA_DATA=/home/dig/graph_data/changsha_data
HADOOP_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
CORRECT_DATA=/home/dig/graph_data/correct_data
HIVE_DATA_DIR=/mnt/changsha/all_json_data_tmp/

arango_tables="Person Company invest officer party_bid sue_relate plaintiff_relate defendant_relate news_entity_relate"

# 图构建的
versions_path=`${HADOOP_HOME}/bin/hadoop fs -ls ${HDFS_PATH}/parsed* |grep changsha|awk '{print $NF}'|sort|tail -n 10`
last_version_path=""
for vp in ${versions_path}
do
    echo ${vp}
    dumped=`${HADOOP_HOME}/bin/hadoop fs -ls ${vp}|grep DUMPED`
    version=`echo ${vp}|awk -F'/' '{print $6}'`
    hasImage=`${HADOOP_HOME}/bin/hadoop fs -ls ${HDFS_PATH}/${version} |grep image`
    echo ${version}
    if [ "${dumped}" == "" -a ! -d ${CHANGSHA_DATA}/data_${version} -a "$hasImage" != "" ];then
        if [ -d "$CHANGSHA_DATA/data" ];then
            rm -r ${CHANGSHA_DATA}/data
        fi
        mkdir -p ${CHANGSHA_DATA}/data/json
        mkdir -p ${CHANGSHA_DATA}/data/image

        # 全量数据copy image data
        for table in ${arango_tables}
        do
            ${HADOOP_HOME}/bin/hadoop fs -copyToLocal ${HDFS_PATH}/${version}/image/${table}_txt ${CHANGSHA_DATA}/data/image
        done
        
        # 过滤后的数据
        #${HADOOP_HOME}/bin/hadoop fs -copyToLocal ${vp}/image/* ${CHANGSHA_DATA}/data/image

        # copy json data
        ${HADOOP_HOME}/bin/hadoop fs -copyToLocal ${vp}/json/* ${CHANGSHA_DATA}/data/json
        ${HADOOP_HOME}/bin/hadoop fs -cat ${HDFS_PATH}/${version}/source_version/part* > ${CHANGSHA_DATA}/data/source_version
        for i in `ls ${CHANGSHA_DATA}/data/json`;do
            if [ -d "${CHANGSHA_DATA}/data/json/${i}" ];then
                cat ${CHANGSHA_DATA}/data/json/${i}/part* > ${CHANGSHA_DATA}/data/json/${i}.json
                rm -rf ${CHANGSHA_DATA}/data/json/${i}
            fi
        done

        mv ${CHANGSHA_DATA}/data ${CHANGSHA_DATA}/data_${version}
        last_version_path=${CHANGSHA_DATA}/data_${version}
        touch DUMPED
        $HADOOP_HOME/bin/hadoop fs -put ./DUMPED ${vp}/


        #复制数据到hive ETL目录
        cp -rf ${CHANGSHA_DATA}/data_${version}  ${HIVE_DATA_DIR}
    fi
done

# 把correct_data放在最新的version中
if [ "${last_version_path}" != "" ];then
    echo "lastes version: "${last_version_path}
    mkdir ${last_version_path}/correct
    cp -r ${CORRECT_DATA}/* ${last_version_path}/correct
fi

